1 Setup

1.1 Packages

library(tidyverse)
library(cowplot)
library(GGally)
library(heatmaply)
library(sva)
library(limma)
library(biobroom)
library(ggridges)
library(ggthemes)
theme_set(theme_minimal())

1.2 Data

1.2.1 Metabolite Abundances

folr1.neg.raw <- read_csv("./data/hilic_target_results/prepared_abundances/folr1_hilic_target_negmode_abundance.csv") %>% 
  mutate(Diet = ifelse(
    Diet == "2ppm", "2ppm_FA",
    ifelse(Diet == "10ppm", "10ppm_FA", Diet)
    ))
folr1.pos.raw <- read_csv("./data/hilic_target_results/prepared_abundances/folr1_hilic_target_posmode_abundance.csv") %>% 
  mutate(Diet = ifelse(
    Diet == "2ppm", "2ppm_FA",
    ifelse(Diet == "10ppm", "10ppm_FA", Diet)
    ))
setequal(folr1.neg.raw$Samples, folr1.pos.raw$Samples)
[1] FALSE

1.2.2 Compound Information

folr1.neg.compound.info <- read_csv("./data/hilic_target_results/prepared_compound_info/folr1_normal_genotype_hilic_neg_cmpnd_info.csv")
folr1.pos.compound.info <- read_csv("./data/hilic_target_results/prepared_compound_info/folr1_normal_genotype_hilic_pos_cmpnd_info.csv")

1.3 Functions

MissingPerSamplePlot <- function(raw.data, start.string) {
  # Counts the number of missing/NA values per sample and
  # percent compounds missing out of total number of compounds per sample
  # Then passes the result into a vertical bar plot, where each 
  # bar represents a single sample and the size of the bar 
  # is the % of compounds missing
  
  counted.na <- raw.data %>%
  select(starts_with(start.string)) %>% 
  mutate(
    count.na = apply(., 1, function(x) sum(is.na(x))),
    percent.na = (count.na / ncol(raw.data %>% select(starts_with(start.string)))) * 100
    ) %>%
  dplyr::select(count.na, percent.na) %>%
  bind_cols(
    raw.data %>% 
      select(Samples, Type)
      ) %>% 
  arrange(percent.na) %>% 
  mutate(f.order = factor(Samples, levels = Samples))
counted.na %>% 
  ggplot(aes(x = f.order, y = percent.na, fill = Type)) +
  geom_bar(stat = "identity") +
  geom_hline(yintercept = 20, color = "gray", size = 1, alpha = 0.8) +
  coord_flip()+
  xlab("Samples") +
  ylab("Percent missing values in sample") +
  theme(axis.text.y = element_text(size = 6)) 
}
MissingPerCompound <- function(raw.data, start.string){
  # Function to count in how many experimental samples each compound is missing
  # Also calculates the % missing:
  # (# NA per compound across all experimental samples) * 100 / (tot num of samples)
  
  raw.data %>% 
  filter(Type == "sample") %>% 
  select(Samples, starts_with(start.string)) %>% 
  gather(key = "Compound", value = "Abundance", -Samples) %>% 
  group_by(Compound) %>% 
  summarise(
    na_count = sum(is.na(Abundance)),
    n_samples = n(),
    percent_na = (na_count * 100 / n_samples)
    ) %>% 
  filter(na_count > 0) %>% 
  arrange(desc(percent_na))
}
ReplaceNAwMinLogTransform <- function(raw.dataframe, start.prefix, type) {
  # Function to replace any NAs in each column with the minimum for that column, 
  # separately for each data.set (patient and dabase samples separately)
  #
  # Returns: returns data frame(? check on this) with only the values bound by the function args
  # 
  smpls <- raw.dataframe %>%
    filter(Type == "sample") %>% 
    dplyr::select(starts_with(start.prefix))
  smpls.min <- lapply(smpls, min, na.rm = TRUE)
  # replace the missing values in the real samples with the minimum of the samples
  # then take the log
  smpls.noNA <- raw.dataframe  %>%
    filter(Type == "sample") %>% 
    dplyr::select(Samples:Prot_quant) %>%
    bind_cols(
      smpls %>%
        replace_na(replace = smpls.min) %>% 
        log2()
      ) 
  # replace missing mix values with values within the mix groups
  mix <- raw.dataframe %>%
    filter(Type == "mix") %>% 
    dplyr::select(starts_with(start.prefix))
  mix.min <- lapply(mix, min, na.rm = TRUE)
  # deal with cases where there is no min:
  mix.min <- lapply(mix.min, function(x) ifelse(is.infinite(x),2,x))
  mix.noNA <- raw.dataframe  %>%
    filter(Type == "mix") %>% 
    dplyr::select(Samples:Prot_quant) %>%
    bind_cols(
      mix %>%
        replace_na(replace = mix.min) %>% 
        log2()
      )
  # replace the missing values in solv and empty samples with 1 - for PCA analysis
  # then take the log
  other.min <- setNames(
    as.list(
      rep(2, ncol(
        raw.dataframe %>% 
          dplyr::select(starts_with(start.prefix))))
      ),
    colnames(raw.dataframe %>% dplyr::select(starts_with(start.prefix)))
                )
  other.num.log <- raw.dataframe %>%
    filter(Type == "solv") %>% 
    dplyr::select(Samples:Prot_quant) %>%
    bind_cols(
      raw.dataframe %>% 
        filter(Type == "solv") %>% 
        dplyr::select(starts_with(start.prefix)) %>%
        replace_na(replace = other.min) %>% 
        log2()
      )
  # combine them together back into one data frame
  all.noNA <- smpls.noNA %>% 
    bind_rows(mix.noNA) %>% 
    bind_rows(other.num.log)
}
HeatmapPrepAlt <- function(raw.data, start.prefix){
  # function for preparing dara for heatmap viz
  x <- raw.data %>% 
    select(starts_with(start.prefix)) %>% 
    scale(center = TRUE, scale = TRUE) %>% 
    as.matrix() 
  row.names(x) <- raw.data$Samples
  return(x)
}

2 Data Exploration

2.1 Mass vs Retention Time

Q: What are the distributions of compound masses and retention times?

full.folr1.cmpnd <- folr1.neg.compound.info %>% 
  mutate(Set = "Neg") %>% 
  bind_rows(folr1.pos.compound.info %>% mutate(Set = "Pos"))
full.folr1.cmpnd %>% 
  ggplot(aes(x = RT, y = Mass)) +
  geom_point(size = 3, alpha = 0.3) +
  xlab("Retention Time (min)") +
  ylab("Mass (Da)") +
  ggtitle("Mass v RT\nFolR1 Exp 1") +
  ylim(0, 1000)

full.folr1.cmpnd %>% 
  ggplot(aes(x = RT, y = Mass, color = Set)) +
  geom_point(size = 3, alpha = 0.8) +
  xlab("Retnetion Time (min)") +
  ylab("Mass (Da)") +
  ggtitle("Mass v RT\nFolR1 Exp 1") +
  facet_wrap(~ Set) +
  ylim(0, 1000)

Q: Which compounds were found in one or more of the data types?

folr1.cmpnd.join <- folr1.neg.compound.info %>% 
  inner_join(folr1.pos.compound.info, by = "CAS_ID", suffix = c("_n", "_p")) %>% 
  select(
    CAS_ID, contains("short"), 
    contains("full"), starts_with("Formula"), 
    starts_with("Mass"), starts_with("RT")
    )
# compound names - found in pos and neg mode / cells 
print(folr1.cmpnd.join$compound_full_n)
 [1] "Glycine"                                          
 [2] "Butyric Acid"                                     
 [3] "Alanine"                                          
 [4] "Sarcosine"                                        
 [5] "BAIBA"                                            
 [6] "2-Aminobutyric Acid"                              
 [7] "Serine"                                           
 [8] "Hypotaurine"                                      
 [9] "Cytosine"                                         
[10] "Uracil"                                           
[11] "Creatinine"                                       
[12] "Proline"                                          
[13] "Valine"                                           
[14] "Threonine"                                        
[15] "Taurine"                                          
[16] "Thymine"                                          
[17] "Ketoleucine"                                      
[18] "5-Aminolevulinic Acid"                            
[19] "Leucine"                                          
[20] "Isoleucine"                                       
[21] "Asparagine"                                       
[22] "Ornithine"                                        
[23] "Aspartic Acid"                                    
[24] "Adenine"                                          
[25] "Hypoxanthine"                                     
[26] "O-Phosphoethanolamine"                            
[27] "Glutamine"                                        
[28] "Lysine"                                           
[29] "Glutamic Acid"                                    
[30] "Methionine"                                       
[31] "Guanine"                                          
[32] "Xanthine"                                         
[33] "Histidine"                                        
[34] "Allantoin"                                        
[35] "2-Aminoadipic Acid"                               
[36] "Phenylalanine"                                    
[37] "Pyridoxal"                                        
[38] "Dihydroxyacetone-phosphate"                       
[39] "Glycerol 2-Phosphate"                             
[40] "N-alpha-Acetylasparagine"                         
[41] "Arginine"                                         
[42] "Citrulline"                                       
[43] "Tyrosine"                                         
[44] "D-Mannitol"                                       
[45] "Phosphocholine"                                   
[46] "N-Acetylmethionine"                               
[47] "Tryptophan"                                       
[48] "Phosphocreatine"                                  
[49] "Pantothenic Acid"                                 
[50] "Deoxycytidine"                                    
[51] "Deoxyuridine"                                     
[52] "Cytidine"                                         
[53] "Uridine"                                          
[54] "Biotin"                                           
[55] "D-Glucose 6-phosphate"                            
[56] "Thiamine (Vit B1)"                                
[57] "2'-Deoxyguanosine"                                
[58] "Inosine"                                          
[59] "Guanosine"                                        
[60] "Stearic Acid"                                     
[61] "Ophthalmic Acid"                                  
[62] "5'-Methylthioadenosine"                           
[63] "Retinoic Acid"                                    
[64] "N-Acetylaspartyl Glutamic Acid"                   
[65] "Glutathione (GSH)"                                
[66] "N-Acetylneuraminic Acid"                          
[67] "CMP"                                              
[68] "UMP"                                              
[69] "3-Phosphoglyceroinositol"                         
[70] "Sucrose"                                          
[71] "AMP"                                              
[72] "dGMP"                                             
[73] "GMP"                                              
[74] "S-Lactoylglutathione"                             
[75] "S-Adenosylhomocysteine (SAH)"                     
[76] "CDP"                                              
[77] "UDP"                                              
[78] "ADP"                                              
[79] "dGDP"                                             
[80] "GDP"                                              
[81] "Adenylosuccinic Acid"                             
[82] "UTP"                                              
[83] "CDP-Choline"                                      
[84] "ATP"                                              
[85] "GTP"                                              
[86] "Cyclic adenosine diphosphate ribose (cADP-ribose)"
[87] "ADP-Ribose"                                       
[88] "UDP-Glucose"                                      
[89] "ADP-Glucose"                                      
[90] "UDP-N-Acetylglucosamine"                          
[91] "GSSG"                                             
[92] "NAD"                                              
# percent of cell / neg compounds found in cell / pos 
round(nrow(folr1.cmpnd.join) * 100 / nrow(folr1.neg.compound.info), 1)
[1] 58.2
# percent of cell / neg compounds found in cell / pos 
round(nrow(folr1.cmpnd.join) * 100 / nrow(folr1.pos.compound.info), 1)
[1] 60.9
# problem checks #
folr1.cmpnd.join %>% 
  select(contains("Mass")) %>% 
  ggpairs()

folr1.cmpnd.join %>% 
  select(starts_with("RT")) %>% 
  ggpairs()

2.2 Missing Values

Q: Do any of the samples have greater than 20% missing (NA) compound abundances, out of all of the features in the dataset?

MissingPerSamplePlot(folr1.neg.raw, "nC") +
  ggtitle("Missing Per Sample\nFolR1 / Exp 1 / Neg Mode")

MissingPerSamplePlot(folr1.pos.raw, "pC") +
  ggtitle("Missing Per Sample\nFolR1 / Exp 1 / Pos Mode")

A: Remove set2_purple_632_R1 from neg mode, pos mode is fine.

folr1.neg.raw <- folr1.neg.raw %>% 
  filter(Samples != "set2_purple_632_R1")

Q: Are any of the compounds more than 25% missing in the experimental sample group? If there are any, they will be excluded from analysis. (Threshold increased from 20 to 25% because this is from mouse embryos, which can be expected to have a lower signal).

(folr1.neg.cmpnd.to.excl <- MissingPerCompound(folr1.neg.raw, "nC") %>% 
  filter(percent_na > 25))
# A tibble: 8 x 4
  Compound na_count n_samples percent_na
  <chr>       <int>     <int>      <dbl>
1 nC57           49        88       55.7
2 nC148          48        88       54.5
3 nC97           37        88       42.0
4 nC79           36        88       40.9
5 nC111          35        88       39.8
6 nC78           31        88       35.2
7 nC150          29        88       33.0
8 nC144          25        88       28.4
MissingPerCompound(folr1.pos.raw, "pC") %>% 
  filter(percent_na > 25)
# A tibble: 0 x 4
# ... with 4 variables: Compound <chr>, na_count <int>, n_samples <int>,
#   percent_na <dbl>

2.3 Negative Controls and Compound Elimination

2.3.1 Negative Mode

folr1.neg.raw.type.mean <- folr1.neg.raw %>% 
  group_by(Type) %>% 
  summarize_at(vars(matches("nC")), mean, na.rm = TRUE) %>% 
  gather(key = "Compound", value = "Type_mean_abun", -Type)
folr1.neg.raw.type.mean %>% 
  ggplot(aes(log2(Type_mean_abun), color = Type)) +
  geom_density(size = 2, alpha = 0.8) +
  ggtitle("Distribution of compound means\nFolR1 Exp 1 / Negative Mode\nGrouped by sample type")

folr1.neg.raw.type.mean.order <- folr1.neg.raw.type.mean %>% 
  filter(Type == "sample") %>% 
  arrange(Type_mean_abun)
folr1.neg.raw %>% 
  select(Samples, Type, starts_with("nC")) %>% 
  gather("Compound", value = "Abundance", -c(Samples, Type)) %>% 
  mutate(Cmpnd_sort = factor(Compound, levels = folr1.neg.raw.type.mean.order$Compound)) %>% 
  ggplot(aes(Cmpnd_sort, log2(Abundance), color = Type, group = Samples)) + 
  geom_line(alpha = 0.1, size = 1) + 
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
  xlab("Compound") +
  geom_line(
    data = folr1.neg.raw.type.mean %>% 
      mutate(Cmpnd_sort = factor(Compound, levels = folr1.neg.raw.type.mean.order$Compound)), 
    aes(Cmpnd_sort, log2(Type_mean_abun), color = Type, group = Type),
    size = 1
    ) +
  ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nFolR1 Exp 1 / Negative Mode") +
  scale_color_brewer(palette = "Dark2")

folr1.neg.raw.type.mean %>% 
  mutate(Cmpnd_sort = factor(Compound, levels = folr1.neg.raw.type.mean.order$Compound)) %>% 
  ggplot(aes(Cmpnd_sort, log2(Type_mean_abun), color = Type, group = Type)) +
  geom_point(size = 1, alpha = 0.8) +
  geom_line(alpha = 0.8) +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
  xlab("Compound") +
  ylab("log2(Sample Type Mean)") +
  ggtitle("Profile Plot of compound means by sample type only\nFolR1 Exp 1 / Negative Mode") +
  scale_color_brewer(palette = "Dark2")

folr1.neg.raw.type.diff <- folr1.neg.raw.type.mean %>% 
  spread(Type, Type_mean_abun) %>% 
  mutate(smpl_solv_diff = sample / solv)
folr1.neg.raw.type.diff %>% 
  ggplot(aes(log2(smpl_solv_diff))) +
  geom_histogram(bins = 50)

# include compounds with FC > 2.5 or FC is NA (indication of NA in solv or empty)
folr1.neg.cmpnd.to.incl <- folr1.neg.raw.type.diff  %>% 
  filter(smpl_solv_diff > 2.5 | is.na(smpl_solv_diff)) %>% 
  filter(!(Compound %in% folr1.neg.cmpnd.to.excl$Compound))
# original number of metabolites
nrow(folr1.neg.raw.type.diff)
[1] 158
# number of metabolites after filtering 
nrow(folr1.neg.cmpnd.to.incl)
[1] 150

2.3.2 Positive Mode

folr1.pos.raw.type.mean <- folr1.pos.raw %>% 
  group_by(Type) %>% 
  summarize_at(vars(matches("pC")), mean, na.rm = TRUE) %>% 
  gather(key = "Compound", value = "Type_mean_abun", -Type)
folr1.pos.raw.type.mean %>% 
  ggplot(aes(log2(Type_mean_abun), color = Type)) +
  geom_density(size = 2, alpha = 0.8) +
  ggtitle("Distribution of compound means\nFolR1 Exp 1 / Positive Mode\nGrouped by sample type")

folr1.pos.raw.type.mean.order <- folr1.pos.raw.type.mean %>% 
  filter(Type == "sample") %>% 
  arrange(Type_mean_abun)
folr1.pos.raw %>% 
  select(Samples, Type, starts_with("pC")) %>% 
  gather("Compound", value = "Abundance", -c(Samples, Type)) %>% 
  mutate(Cmpnd_sort = factor(Compound, levels = folr1.pos.raw.type.mean.order$Compound)) %>% 
  ggplot(aes(Cmpnd_sort, log2(Abundance), color = Type, group = Samples)) + 
  geom_line(alpha = 0.1, size = 1) + 
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
  xlab("Compound") +
  geom_line(
    data = folr1.pos.raw.type.mean %>% 
      mutate(Cmpnd_sort = factor(Compound, levels = folr1.pos.raw.type.mean.order$Compound)), 
    aes(Cmpnd_sort, log2(Type_mean_abun), color = Type, group = Type),
    size = 1
    ) +
  ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nFolR1 Exp 1 / Positive Mode") +
  scale_color_brewer(palette = "Dark2")

folr1.pos.raw.type.mean %>% 
  mutate(Cmpnd_sort = factor(Compound, levels = folr1.pos.raw.type.mean.order$Compound)) %>% 
  ggplot(aes(Cmpnd_sort, log2(Type_mean_abun), color = Type, group = Type)) +
  geom_point(size = 1, alpha = 0.8) +
  geom_line(alpha = 0.8) +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
  xlab("Compound") +
  ylab("log2(Sample Type Mean)") +
  ggtitle("Profile Plot of compound means by sample type only\nFolR1 Exp 1 / Positive Mode") +
  scale_color_brewer(palette = "Dark2")

folr1.pos.raw.type.diff <- folr1.pos.raw.type.mean %>% 
  spread(Type, Type_mean_abun) %>% 
  mutate(smpl_solv_diff = sample / solv)
folr1.pos.raw.type.diff %>% 
  ggplot(aes(log2(smpl_solv_diff))) +
  geom_histogram(bins = 50)

# include compounds with FC > 2.5 or FC is NA (indication of NA in solv or empty)
folr1.pos.cmpnd.to.incl <- folr1.pos.raw.type.diff %>% 
  filter(smpl_solv_diff > 2.5 | is.na(smpl_solv_diff))
# original number of metabolites
nrow(folr1.pos.raw.type.diff)
[1] 151
# number of metabolites after filtering 
nrow(folr1.pos.cmpnd.to.incl)
[1] 144

3 Data Prep and Preliminary Analysis

3.1 Cleanup

folr1.neg.noNA <- folr1.neg.raw %>% 
  select(Samples:Prot_quant, one_of(folr1.neg.cmpnd.to.incl$Compound)) %>% 
  ReplaceNAwMinLogTransform("nC")
folr1.pos.noNA <- folr1.pos.raw %>% 
  select(Samples:Prot_quant, one_of(folr1.pos.cmpnd.to.incl$Compound)) %>% 
  ReplaceNAwMinLogTransform("pC")

3.2 Distribution plots

3.2.1 Negative Mode

folr1.neg.noNA.gathered <- folr1.neg.noNA %>% 
  gather(
    key = "Metabolite", "Abundance", 
    which(colnames(folr1.neg.noNA) == "nC1"):ncol(folr1.neg.noNA)
    )
# plot all abundances in a sample, grouped by sample as a boxplot
folr1.neg.noNA.gathered %>% 
  ggplot(aes(Samples, Abundance, fill = Type)) +
  geom_boxplot() +
  geom_boxplot(aes(color = Type), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  ylab("log2(Abundance)") +
  ggtitle("Boxplot of compound abundances\nAll samples\nFolR1 Exp 1/ Negative Mode") +
  scale_fill_tableau() +
  scale_color_tableau()

# same data format, but as ridge plots
folr1.neg.noNA.gathered %>% 
  ggplot(aes(y = Samples, x = Abundance, fill = Type)) + 
  geom_density_ridges(scale = 15) +
  theme_ridges() +
  scale_y_discrete(expand = c(0.01, 0)) +
  ggtitle("Ridge plot of compound abundances\nAll samples\nFolR1 Exp 1/ Negative Mode") +
  scale_fill_tableau()

folr1.neg.noNA.gathered %>% 
  filter(Type != "solv") %>% 
  ggplot(aes(y = Samples, x = Abundance, fill = Type)) + 
  geom_density_ridges(scale = 15) +
  theme_ridges() +
  scale_y_discrete(expand = c(0.01, 0)) +
  ggtitle("Ridge plot of compound abundances\nAll samples\nFolR1 Exp 1/ Negative Mode") +
  scale_fill_tableau()

# experimental samples only
folr1.neg.noNA.gathered %>% 
  filter(Type == "sample") %>% 
  unite("Condition", Diet:Genotype, sep = "_") %>% 
  ggplot(aes(y = Samples, x = Abundance, fill = Condition)) + 
  geom_density_ridges(scale = 10) +
  theme_ridges() +
  scale_y_discrete(expand = c(0.01, 0)) +
  ggtitle("Ridge plot of compound abundances\nExperimental samples only\nFolR1 Exp 1/ Negative Mode") +
  scale_fill_tableau()

# overlay the distributions for another look
folr1.neg.noNA.gathered %>%
  filter(Type == "sample") %>% 
  unite("Condition", Diet:Genotype, sep = "_") %>% 
  ggplot(aes(Abundance, group = Samples, color = Condition)) +
  geom_density(alpha = 0.8, size = 0.75) +
  xlab("log2(Abundance)") +
  ggtitle("Density plot of compound abundances\nExperimental samples only\nFolR1 Exp 1/ Negative Mode") +
  scale_color_tableau()

3.2.2 Positive Mode

folr1.pos.noNA.gathered <- folr1.pos.noNA %>% 
  gather(
    key = "Metabolite", "Abundance", 
    which(colnames(folr1.pos.noNA) == "pC1"):ncol(folr1.pos.noNA)
    )
# plot all abundances in a sample, grouped by sample as a boxplot
folr1.pos.noNA.gathered %>% 
  ggplot(aes(Samples, Abundance, fill = Type)) +
  geom_boxplot() +
  geom_boxplot(aes(color = Type), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  ylab("log2(Abundance)") +
  ggtitle("Boxplot of compound abundances\nAll samples\nFolR1 Exp 1/ Positive Mode") +
  scale_color_tableau() +
  scale_fill_tableau()

Note: filter out set2_purple_632_R1

# same data format, but as ridge plots
folr1.pos.noNA.gathered %>% 
  ggplot(aes(y = Samples, x = Abundance, fill = Type)) + 
  geom_density_ridges(scale = 15) +
  theme_ridges() +
  scale_y_discrete(expand = c(0.01, 0)) +
  ggtitle("Ridge plot of compound abundances\nAll samples\nFolR1 Exp 1/ Positive Mode") +
  scale_fill_tableau()

# experimental samples only
folr1.pos.noNA.gathered %>% 
  filter(Type == "sample") %>% 
  unite("Condition", Diet:Genotype, sep = "_") %>% 
  ggplot(aes(y = Samples, x = Abundance, fill = Condition)) + 
  geom_density_ridges(scale = 10) +
  theme_ridges() +
  scale_y_discrete(expand = c(0.01, 0)) +
  ggtitle("Ridge plot of compound abundances\nExperimental samples only\nFolR1 Exp 1/ Positive Mode") +
  scale_fill_tableau()

# overlay the distributions for another look
folr1.pos.noNA.gathered %>%
  filter(Type == "sample") %>% 
  unite("Condition", Diet:Genotype, sep = "_") %>% 
  ggplot(aes(Abundance, group = Samples, color = Condition)) +
  geom_density(alpha = 0.8, size = 0.75) +
  xlab("log2(Abundance)") +
  ggtitle("Density plot of compound abundances\nExperimental samples only\nFolR1 Exp 1/ Positive Mode") +
  scale_color_tableau()

folr1.pos.noNA <- folr1.pos.noNA %>% 
  filter(Samples != "set2_purple_632_R1")

3.3 Principal Component Analysis

Some plots to understand the relationship between the samples, mix samples, solvent, and empty samples in some cases.

3.3.1 Negative Mode

### PCA on all Samples ###
folr1.neg.full.pca <- folr1.neg.noNA %>% 
  select(starts_with("nC")) %>% 
  mutate_all(scale, center = TRUE, scale = FALSE) %>% 
  as.matrix() %>% 
  prcomp()
plot(
  (folr1.neg.full.pca$sdev ^ 2) * 100 / sum(folr1.neg.full.pca$sdev ^ 2), 
  xlab = "Principal Component",
  ylab = "Variance Explained",
  main = "Percent variance explained by each principal component\nAll samples only\nFolR1 Exp 1 / Negative Mode",
  type = "b"
  )

folr1.neg.full.pca.x <- as.data.frame(folr1.neg.full.pca$x)
row.names(folr1.neg.full.pca.x) <- folr1.neg.noNA$Samples
folr1.neg.full.pca.x <- folr1.neg.full.pca.x %>% 
  bind_cols(folr1.neg.noNA %>% select(Diet:Prot_quant))
folr1.neg.full.pca.x %>% 
  ggplot(aes(x = PC1, y = PC2, color = Type)) +
  geom_point(size = 4, alpha = 0.8) +
  xlab("PC1 (82.4% Var)") +
  ylab("PC2 (3.7% Var)") +
  ggtitle("Principal Component Analysis\nAll Samples\nFolR1 Exp 1 / Negative Mode") +
  scale_color_tableau()

### Samples and mix ###
folr1.neg.smpl.mix.pca <- folr1.neg.noNA %>% 
  filter(Type == "sample" | Type == "mix") %>% 
  select(starts_with("nC")) %>% 
  mutate_all(scale, center = TRUE, scale = FALSE) %>% 
  as.matrix() %>% 
  prcomp()
plot(
  (folr1.neg.smpl.mix.pca$sdev ^ 2) * 100 / sum(folr1.neg.smpl.mix.pca$sdev ^ 2), 
  xlab = "Principal Component",
  ylab = "Variance Explained",
  main = "Percent variance explained by each principal component\nSamples and mix\nFolR1 Exp 1 / Negative Mode",
  type = "b"
  )

folr1.neg.smpl.mix.pca.x <- as.data.frame(folr1.neg.smpl.mix.pca$x)
folr1.neg.smpl.mix.pca.x <- folr1.neg.smpl.mix.pca.x %>% 
  bind_cols(
    folr1.neg.noNA %>% 
      filter(Type == "sample" | Type == "mix") %>% 
      select(Samples, Diet:Prot_quant)
  )
row.names(folr1.neg.smpl.mix.pca.x) <- folr1.neg.smpl.mix.pca.x$Samples
folr1.neg.smpl.mix.pca.x %>% 
  ggplot(aes(x = PC1, y = PC2, color = Genotype, shape = Type)) +
  geom_point(size = 4, alpha = 0.8) +
  xlab("PC1 (30.3% Var)") +
  ylab("PC2 (16.8% Var)") +
  ggtitle("Principal Component Analysis\nSamples and mix\nFolR1 Exp 1 / Negative Mode") +
  scale_color_tableau()

folr1.neg.smpl.mix.pca.x %>% 
  ggplot(aes(x = PC3, y = PC4, color = Genotype, shape = Type)) +
  geom_point(size = 4, alpha = 0.8) +
  xlab("PC3 (7.5% Var)") +
  ylab("PC4 (6.7% Var)") +
  ggtitle("Principal Component Analysis\nSamples and mix\nFolR1 Exp 1 / Negative Mode") +
  scale_color_tableau()

### Experimental Samples Only ###
folr1.neg.smpl.pca <- folr1.neg.noNA %>% 
  filter(Type == "sample") %>% 
  select(starts_with("nC")) %>% 
  mutate_all(scale, center = TRUE, scale = FALSE) %>% 
  as.matrix() %>% 
  prcomp()
plot(
  (folr1.neg.smpl.pca$sdev ^ 2) * 100 / sum(folr1.neg.smpl.pca$sdev ^ 2), 
  xlab = "Principal Component",
  ylab = "Variance Explained",
  main = "Percent variance explained by each principal component\nExperimental samples only\nFolR1 Exp 1 / Negative Mode",
  type = "b"
  )

folr1.neg.smpl.pca.x <- as.data.frame(folr1.neg.smpl.pca$x)
folr1.neg.smpl.pca.x <- folr1.neg.smpl.pca.x %>% 
  bind_cols(
    folr1.neg.noNA %>% 
      filter(Type == "sample") %>% 
      select(Samples, Diet:Prot_quant)
  )
row.names(folr1.neg.smpl.pca.x) <- folr1.neg.smpl.pca.x$Samples
folr1.neg.smpl.pca.x %>% 
  ggplot(aes(x = PC1, y = PC2, shape = Genotype, color = Diet)) +
  geom_point(size = 4, alpha = 0.8) +
  xlab("PC1 (30.2% Var)") +
  ylab("PC2 (17.3% Var)") +
  ggtitle("Principal Component Analysis\nExperimental samples only\nFolR1 Exp 1 / Negative Mode") +
  scale_color_tableau()

folr1.neg.smpl.pca.x %>% 
  ggplot(aes(x = PC3, y = PC4, shape = Genotype, color = Diet)) +
  geom_point(size = 4, alpha = 0.8) +
  xlab("PC3 (7.6% Var)") +
  ylab("PC4 (6.8% Var)") +
  ggtitle("Principal Component Analysis\nExperimental samples only\nFolR1 Exp 1 / Negative Mode") +
  scale_color_tableau()

folr1.neg.covar <- folr1.neg.smpl.pca.x %>% 
  select(Samples, PC1:PC8, Diet:Prot_quant) %>% 
  as_tibble() %>% 
  mutate(Prot_quant = as.numeric(Prot_quant)) %>% 
  gather("PC", "value", PC1:PC8)
folr1.neg.covar %>% 
  ggplot(aes(Litter_ID, value, color = Genotype, shape = Diet)) +
  geom_point(size = 2) +
  facet_wrap(~ PC, scales = "free_y") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  scale_color_tableau()

folr1.neg.covar %>% 
  mutate(Litter_ID = as.numeric(Litter_ID)) %>% 
  filter(PC == "PC7") %>% 
  lm(value ~ Litter_ID, data = .) %>% 
  summary()

Call:
lm(formula = value ~ Litter_ID, data = .)

Residuals:
   Min     1Q Median     3Q    Max 
-4.846 -1.262 -0.437  1.256  3.689 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -8.703054   1.534147  -5.673 1.85e-07 ***
Litter_ID    0.012825   0.002241   5.722 1.50e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.874 on 86 degrees of freedom
Multiple R-squared:  0.2757,    Adjusted R-squared:  0.2673 
F-statistic: 32.74 on 1 and 86 DF,  p-value: 1.504e-07
# signal difference?
folr1.neg.noNA.gathered %>% 
  filter(Type == "sample") %>% 
  group_by(Samples, Genotype, Diet, Litter_ID) %>% 
  summarize(avg_abun = mean(Abundance)) %>% 
  mutate(Litter_ID = as.numeric(Litter_ID)) %>% 
  ggplot(aes(Litter_ID, avg_abun)) +
  geom_point(size = 2, aes(color = Genotype, shape = Diet)) +
  geom_smooth(method = "lm") +
  scale_color_tableau() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

folr1.neg.covar %>% 
  ggplot(aes(Prot_quant, value, color = Genotype, shape = Diet)) +
  geom_point(size = 2) +
  facet_wrap(~ PC, scales = "free_y") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  scale_color_tableau()

folr1.neg.covar %>% 
  group_by(PC) %>% 
  summarize(corr = cor(value, Prot_quant))
# A tibble: 8 x 2
  PC       corr
  <chr>   <dbl>
1 PC1   -0.454 
2 PC2    0.0752
3 PC3   -0.225 
4 PC4   -0.0671
5 PC5   -0.277 
6 PC6   -0.252 
7 PC7   -0.436 
8 PC8    0.277 
folr1.neg.covar %>% 
  ggplot(aes(Emb_position, value, color = Genotype, shape = Diet)) +
  geom_point(size = 2) +
  facet_wrap(~ PC, scales = "free_y") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  scale_color_tableau()

3.3.2 Positive Mode

### PCA on all Samples ###
folr1.pos.full.pca <- folr1.pos.noNA %>% 
  select(starts_with("pC")) %>% 
  mutate_all(scale, center = TRUE, scale = FALSE) %>% 
  as.matrix() %>% 
  prcomp()
plot(
  (folr1.pos.full.pca$sdev ^ 2) * 100 / sum(folr1.pos.full.pca$sdev ^ 2), 
  xlab = "Principal Component",
  ylab = "Variance Explained",
  main = "Percent variance explained by each principal component\nAll samples only\nFolR1 Exp 1 / Positive Mode",
  type = "b"
  )

folr1.pos.full.pca.x <- as.data.frame(folr1.pos.full.pca$x)
row.names(folr1.pos.full.pca.x) <- folr1.pos.noNA$Samples
folr1.pos.full.pca.x <- folr1.pos.full.pca.x %>% 
  bind_cols(folr1.pos.noNA %>% select(Diet:Prot_quant))
folr1.pos.full.pca.x %>% 
  ggplot(aes(x = PC1, y = PC2, color = Type)) +
  geom_point(size = 4, alpha = 0.8) +
  xlab("PC1 (43.4% Var)") +
  ylab("PC2 (9.9% Var)") +
  ggtitle("Principal Component Analysis\nAll Samples\nFolR1 Exp 1 / Positive Mode") +
  scale_color_tableau()

### Samples and mix ###
folr1.pos.smpl.mix.pca <- folr1.pos.noNA %>% 
  filter(Type == "sample" | Type == "mix") %>% 
  select(starts_with("pC")) %>% 
  mutate_all(scale, center = TRUE, scale = FALSE) %>% 
  as.matrix() %>% 
  prcomp()
plot(
  (folr1.pos.smpl.mix.pca$sdev ^ 2) * 100 / sum(folr1.pos.smpl.mix.pca$sdev ^ 2), 
  xlab = "Principal Component",
  ylab = "Variance Explained",
  main = "Percent variance explained by each principal component\nSamples and mix\nFolR1 Exp 1 / Positive Mode",
  type = "b"
  )

folr1.pos.smpl.mix.pca.x <- as.data.frame(folr1.pos.smpl.mix.pca$x)
folr1.pos.smpl.mix.pca.x <- folr1.pos.smpl.mix.pca.x %>% 
  bind_cols(
    folr1.pos.noNA %>% 
      filter(Type == "sample" | Type == "mix") %>% 
      select(Samples, Diet:Prot_quant)
  )
row.names(folr1.pos.smpl.mix.pca.x) <- folr1.pos.smpl.mix.pca.x$Samples
folr1.pos.smpl.mix.pca.x %>% 
  ggplot(aes(x = PC1, y = PC2, color = Genotype, shape = Type)) +
  geom_point(size = 4, alpha = 0.8) +
  xlab("PC1 (19.4% Var)") +
  ylab("PC2 (14.1% Var)") +
  ggtitle("Principal Component Analysis\nSamples and mix\nFolR1 Exp 1 / Positive Mode") +
  scale_color_tableau()

folr1.pos.smpl.mix.pca.x %>% 
  ggplot(aes(x = PC3, y = PC4, color = Genotype, shape = Type)) +
  geom_point(size = 4, alpha = 0.8) +
  xlab("PC3 (10.5% Var)") +
  ylab("PC4 (8.4% Var)") +
  ggtitle("Principal Component Analysis\nSamples and mix\nFolR1 Exp 1 / Positive Mode") +
  scale_color_tableau()

### Experimental Samples Only ###
folr1.pos.smpl.pca <- folr1.pos.noNA %>% 
  filter(Type == "sample") %>% 
  select(starts_with("pC")) %>% 
  mutate_all(scale, center = TRUE, scale = FALSE) %>% 
  as.matrix() %>% 
  prcomp()
plot(
  (folr1.pos.smpl.pca$sdev ^ 2) * 100 / sum(folr1.pos.smpl.pca$sdev ^ 2), 
  xlab = "Principal Component",
  ylab = "Variance Explained",
  main = "Percent variance explained by each principal component\nExperimental samples only\nFolR1 Exp 1 / Positive Mode",
  type = "b"
  )

folr1.pos.smpl.pca.x <- as.data.frame(folr1.pos.smpl.pca$x)
folr1.pos.smpl.pca.x <- folr1.pos.smpl.pca.x %>% 
  bind_cols(
    folr1.pos.noNA %>% 
      filter(Type == "sample") %>% 
      select(Samples, Diet:Prot_quant)
  )
row.names(folr1.pos.smpl.pca.x) <- folr1.pos.smpl.pca.x$Samples
folr1.pos.smpl.pca.x %>% 
  ggplot(aes(x = PC1, y = PC2, shape = Genotype, color = Diet)) +
  geom_point(size = 4, alpha = 0.8) +
  xlab("PC1 (19.6% Var)") +
  ylab("PC2 (14.5% Var)") +
  ggtitle("Principal Component Analysis\nExperimental samples only\nFolR1 Exp 1 / Positive Mode") +
  scale_color_tableau()

folr1.pos.smpl.pca.x %>% 
  ggplot(aes(x = PC3, y = PC4, shape = Genotype, color = Diet)) +
  geom_point(size = 4, alpha = 0.8) +
  xlab("PC3 (10.1% Var)") +
  ylab("PC4 (8.5% Var)") +
  ggtitle("Principal Component Analysis\nExperimental samples only\nFolR1 Exp 1 / Positive Mode") +
  scale_color_tableau()

folr1.pos.covar <- folr1.pos.smpl.pca.x %>% 
  select(Samples, PC1:PC8, Diet:Prot_quant) %>% 
  as_tibble() %>% 
  mutate(Prot_quant = as.numeric(Prot_quant)) %>% 
  gather("PC", "value", PC1:PC8)
folr1.pos.covar %>% 
  ggplot(aes(Litter_ID, value, color = Set, shape = Diet)) +
  geom_point(size = 2) +
  facet_wrap(~ PC, scales = "free_y") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  scale_color_tableau()

folr1.pos.covar %>% 
  mutate(Litter_ID = as.numeric(Litter_ID)) %>% 
  group_by(PC) %>% 
  summarize(corr = cor(value, Litter_ID))
# A tibble: 8 x 2
  PC       corr
  <chr>   <dbl>
1 PC1    0.131 
2 PC2    0.0211
3 PC3    0.164 
4 PC4   -0.549 
5 PC5    0.146 
6 PC6    0.0503
7 PC7    0.295 
8 PC8    0.158 
folr1.pos.covar %>% 
  mutate(Litter_ID = as.numeric(Litter_ID)) %>% 
  filter(PC == "PC4") %>% 
  lm(value ~ Litter_ID, data = .) %>% 
  summary()

Call:
lm(formula = value ~ Litter_ID, data = .)

Residuals:
    Min      1Q  Median      3Q     Max 
-8.2611 -1.4415 -0.2366  1.6574  6.5693 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 12.227320   2.035501   6.007 4.54e-08 ***
Litter_ID   -0.018007   0.002972  -6.059 3.62e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.483 on 85 degrees of freedom
Multiple R-squared:  0.3016,    Adjusted R-squared:  0.2934 
F-statistic: 36.71 on 1 and 85 DF,  p-value: 3.622e-08
# signal difference?
folr1.pos.noNA.gathered %>% 
  filter(Type == "sample" & Samples != "set2_purple_632_R1") %>% 
  group_by(Samples, Genotype, Diet, Litter_ID) %>% 
  summarize(avg_abun = mean(Abundance)) %>% 
  mutate(Litter_ID = as.numeric(Litter_ID)) %>% 
  ggplot(aes(Litter_ID, avg_abun)) +
  geom_point(size = 2, aes(color = Genotype, shape = Diet)) +
  geom_smooth(method = "lm") +
  scale_color_tableau() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

folr1.pos.covar %>% 
  ggplot(aes(Prot_quant, value, color = Genotype, shape = Diet)) +
  geom_point(size = 2) +
  facet_wrap(~ PC, scales = "free_y") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  scale_color_tableau()

folr1.pos.covar %>% 
  group_by(PC) %>% 
  summarize(corr = cor(value, Prot_quant))
# A tibble: 8 x 2
  PC        corr
  <chr>    <dbl>
1 PC1    0.294  
2 PC2    0.0539 
3 PC3   -0.408  
4 PC4    0.220  
5 PC5    0.00748
6 PC6   -0.401  
7 PC7   -0.0496 
8 PC8   -0.155  
folr1.pos.covar %>% 
  ggplot(aes(Emb_position, value, color = Genotype, shape = Diet)) +
  geom_point(size = 2) +
  facet_wrap(~ PC, scales = "free_y") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  scale_color_tableau()

folr1.pos.covar %>% 
  ggplot(aes(Set, value)) +
  geom_boxplot() +
  geom_jitter(width = 0.25, size = 2, aes(color = Genotype, shape = Diet)) +
  facet_wrap(~ PC, scales = "free_y") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  scale_color_tableau()

4 Batch Effects and Signifiance Testing

4.1 Negative Mode

# sample prep
folr1.neg.smpl.data <- folr1.neg.noNA %>% 
  filter(Type == "sample") %>% 
  unite("Condition", Diet:Genotype, sep = "_", remove = FALSE)
folr1.neg.data.for.sva <- as.matrix(
  folr1.neg.smpl.data[, which(
    colnames(folr1.neg.smpl.data) == "nC1"
    ):ncol(folr1.neg.smpl.data)]
  )
row.names(folr1.neg.data.for.sva) <- folr1.neg.smpl.data$Samples
folr1.neg.data.for.sva <- t(folr1.neg.data.for.sva)
# pheno prep
folr1.neg.data.pheno <- as.data.frame(folr1.neg.smpl.data[, 3:10])
row.names(folr1.neg.data.pheno) <- folr1.neg.smpl.data$Samples
# sva calculation
folr1.neg.mod <- model.matrix(~ 0 + as.factor(Condition), data = folr1.neg.data.pheno)
folr1.neg.mod0 <- model.matrix(~ 1, data = folr1.neg.data.pheno)
set.seed(2018)
num.sv(folr1.neg.data.for.sva, folr1.neg.mod, method = "be")
[1] 6
set.seed(2018)
num.sv(folr1.neg.data.for.sva, folr1.neg.mod, method = "leek")
[1] 1
set.seed(2018)
folr1.neg.sv <- sva(folr1.neg.data.for.sva, folr1.neg.mod, folr1.neg.mod0)
Number of significant surrogate variables is:  6 
Iteration (out of 5 ):1  2  3  4  5  
# extract the surrogate variables
folr1.neg.surr.var <- as.data.frame(folr1.neg.sv$sv)
colnames(folr1.neg.surr.var) <- c("S1", "S2", "S3", "S4", "S5", "S6")

folr1.neg.covar <- folr1.neg.covar %>% 
  spread(key = PC, value = value) %>% 
  unite(col = "Condition", Diet:Genotype, remove = FALSE) %>% 
  inner_join(
    cbind(folr1.neg.data.pheno, folr1.neg.surr.var) %>% 
      rownames_to_column("Samples") %>% 
      select(Samples, S1:S6),
    by = "Samples"
    )
folr1.neg.covar %>% 
  select(Samples, Condition:Genotype, S1:S6) %>% 
  gather("surr_var", "value", S1:S6) %>% 
  ggplot(aes(Condition, value)) +
  geom_boxplot() +
  geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Diet)) +
  facet_wrap(~ surr_var, scales = "free_y") +
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
  scale_color_tableau()

folr1.neg.covar %>% 
  select(Samples:Genotype, PC1:PC8) %>% 
  gather("PC", "value", PC1:PC8) %>% 
  ggplot(aes(Condition, value)) +
  geom_boxplot() +
  geom_point(size = 2, alpha = 0.8, aes(color = Genotype)) +
  facet_wrap(~ PC, scales = "free") +
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
  scale_color_tableau()

folr1.neg.covar %>% 
  select(PC1:PC8, S1:S6) %>% 
  ggcorr(label = TRUE)

folr1.neg.covar %>% 
  ggplot(aes(PC1, S1, color = Set)) +
  geom_point(size = 2, alpha = 0.8)

colnames(folr1.neg.mod) <- c("fa_het", "fa_wt", "met_het", "met_wt", "het", "wt")
# combine the full model matrix and the surrogate variables into one
folr1.neg.design.sv <- cbind(folr1.neg.mod, folr1.neg.surr.var[, 1:2])
#folr1.neg.cont.mat <- makeContrasts(
#  genotype_main = (fa_wt + met_wt + wt) - (fa_het + met_het + het), 
#  diet_main = fa_cd - fa_wt,
#  FA_diff = (fa_cd - fa_wt) - (cd - wt),
#  levels = c("fa_cd", "fa_wt", "cd", "wt", "S1")
#  )
test.matrix <- model.matrix(~Diet*Genotype, folr1.neg.data.pheno) %>% 
 cbind(folr1.neg.surr.var[, 1])
colnames(test.matrix)[7] <- "S1"
# fit the model/design matrix
folr1.neg.eb <- folr1.neg.data.for.sva %>% 
  lmFit(test.matrix) %>% 
  eBayes()
folr1.neg.eb.tidy <- tidy(folr1.neg.eb) #%>% 
#  mutate(
#    adj_pval = p.adjust(p.value, method = "BH"),
#    FC = 2 ^ estimate,
#    change_in_cd = ifelse(FC < 1, "down", "up")
#    ) %>% 
#  rename(compound_short = gene)
folr1.neg.eb.tidy %>% 
  ggplot(aes(p.value, fill = term)) +
  geom_histogram(bins = 50) +
  geom_vline(xintercept = 0.05) +
  facet_wrap(~ term, scales = "free_y") +
  scale_fill_tableau()

folr1.neg.eb.tidy %>% 
  rename("compound_short" = gene) %>% 
  filter(p.value < 0.05/ 150) %>% 
  inner_join(folr1.neg.compound.info, by = "compound_short")
# A tibble: 125 x 11
   compound_short term  estimate statistic p.value   lod compound_full
   <chr>          <fct>    <dbl>     <dbl>   <dbl> <dbl> <chr>        
 1 nC129          Diet~   -0.824     -4.07 1.05e-4 1.15  Docosahexaen~
 2 nC21           Diet~   -1.03      -3.77 3.02e-4 0.204 Uracil       
 3 nC28           Diet~   -0.584     -4.24 5.72e-5 1.70  Threonine    
 4 nC54           Diet~   -2.02      -4.22 6.14e-5 1.63  Xanthine     
 5 nC13           Diet~    1.22       3.83 2.50e-4 0.287 2-Hydroxybut~
 6 nC147          Diet~    3.26       3.89 2.00e-4 0.492 UTP          
 7 nC149          Diet~    1.93       3.90 1.97e-4 0.508 ATP          
 8 nC151          Diet~    0.822      4.80 6.87e-6 3.62  Cyclic adeno~
 9 nC154          Diet~    1.34       4.98 3.32e-6 4.29  UDP-Glucose  
10 nC156          Diet~    0.929      4.57 1.68e-5 2.78  UDP-N-Acetyl~
# ... with 115 more rows, and 4 more variables: Formula <chr>, Mass <dbl>,
#   RT <dbl>, CAS_ID <chr>
folr1.neg.noNA %>% 
  filter(Type == "sample") %>% 
  ggplot(aes(Diet, nC149)) +
  geom_boxplot() +
  geom_jitter(aes(color = Genotype), size = 2, width = 0.1) + 
  scale_color_tableau() + 
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

#folr1.neg.eb.tidy %>% 
#  filter(adj_pval < 0.05 & (FC > 1.2 | FC < 1/1.2)) %>% 
#  inner_join(folr1.neg.compound.info, by = "compound_short")
# volcano plot
#folr1.neg.eb.tidy %>% 
#  ggplot(aes(estimate, -log10(adj_pval))) +
#  geom_point(size = 2, alpha = 0.5) +
#  geom_hline(linetype = "dashed", color = "#009E73", yintercept = -log10(0.05)) +
#  geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1.2)) +
#  geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1/1.2)) +
#  xlab("log2(FC)") +
#  ylab("-log10(adjusted p-value)") +
#  ggtitle("Volcano plot\nLrp6KO Exp 2 / Negative Mode")

4.2 Positive Mode

# sample prep

5 Conclusion

6 Session Info

sessionInfo()
R version 3.5.2 (2018-12-20)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 17134)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] bindrcpp_0.2.2      ggthemes_4.0.1      ggridges_0.5.1     
 [4] biobroom_1.12.1     broom_0.5.1         limma_3.36.3       
 [7] sva_3.28.0          BiocParallel_1.14.2 genefilter_1.62.0  
[10] mgcv_1.8-26         nlme_3.1-137        heatmaply_0.15.2   
[13] viridis_0.5.1       viridisLite_0.3.0   plotly_4.8.0       
[16] GGally_1.4.0        cowplot_0.9.4       forcats_0.3.0      
[19] stringr_1.3.1       dplyr_0.7.8         purrr_0.3.0        
[22] readr_1.3.1         tidyr_0.8.2         tibble_2.0.1       
[25] ggplot2_3.1.0       tidyverse_1.2.1    

loaded via a namespace (and not attached):
  [1] colorspace_1.4-0     class_7.3-14         modeltools_0.2-22   
  [4] mclust_5.4.2         rstudioapi_0.9.0     flexmix_2.3-14      
  [7] bit64_0.9-7          fansi_0.4.0          AnnotationDbi_1.42.1
 [10] mvtnorm_1.0-8        lubridate_1.7.4      xml2_1.2.0          
 [13] splines_3.5.2        codetools_0.2-15     robustbase_0.93-3   
 [16] knitr_1.21           jsonlite_1.6         annotate_1.58.0     
 [19] cluster_2.0.7-1      kernlab_0.9-27       compiler_3.5.2      
 [22] httr_1.4.0           backports_1.1.3      assertthat_0.2.0    
 [25] Matrix_1.2-15        lazyeval_0.2.1       cli_1.0.1           
 [28] htmltools_0.3.6      tools_3.5.2          gtable_0.2.0        
 [31] glue_1.3.0           reshape2_1.4.3       Rcpp_1.0.0          
 [34] Biobase_2.40.0       cellranger_1.1.0     trimcluster_0.1-2.1 
 [37] gdata_2.18.0         iterators_1.0.10     fpc_2.1-11.1        
 [40] xfun_0.4             rvest_0.3.2          gtools_3.8.1        
 [43] XML_3.98-1.16        dendextend_1.9.0     DEoptimR_1.0-8      
 [46] MASS_7.3-51.1        scales_1.0.0         TSP_1.1-6           
 [49] hms_0.4.2            parallel_3.5.2       RColorBrewer_1.1-2  
 [52] yaml_2.2.0           memoise_1.1.0        gridExtra_2.3       
 [55] reshape_0.8.8        stringi_1.2.4        RSQLite_2.1.1       
 [58] gclus_1.3.2          S4Vectors_0.18.3     foreach_1.4.4       
 [61] seriation_1.2-3      caTools_1.17.1.1     BiocGenerics_0.26.0 
 [64] matrixStats_0.54.0   rlang_0.3.1          pkgconfig_2.0.2     
 [67] prabclus_2.2-7       bitops_1.0-6         evaluate_0.12       
 [70] lattice_0.20-38      bindr_0.1.1          labeling_0.3        
 [73] htmlwidgets_1.3      bit_1.1-14           tidyselect_0.2.5    
 [76] plyr_1.8.4           magrittr_1.5         R6_2.3.0            
 [79] IRanges_2.14.11      gplots_3.0.1.1       generics_0.0.2      
 [82] DBI_1.0.0            pillar_1.3.1         haven_2.0.0         
 [85] whisker_0.3-2        withr_2.1.2          survival_2.43-3     
 [88] RCurl_1.95-4.11      nnet_7.3-12          modelr_0.1.3        
 [91] crayon_1.3.4         utf8_1.1.4           KernSmooth_2.23-15  
 [94] rmarkdown_1.11       grid_3.5.2           readxl_1.2.0        
 [97] data.table_1.12.0    blob_1.1.1           digest_0.6.18       
[100] diptest_0.75-7       webshot_0.5.1        xtable_1.8-3        
[103] stats4_3.5.2         munsell_0.5.0        registry_0.5